* Creates long file from which HILDA mobility estimates will be obtained

* USES U FILES - TO CHECK CHANGE OF LGA, SD, State etc - for 2001-2006

clear
clear matrix
clear mata
set maxvar 30000
set more off
pause off

local hildadir "X:\HILDA\Release 160\files\STATA 160u"
local repwtsdir C:\Users\nw\Documents\HILDA Project\Data\HILDA Release\Release 16\Stata 160c
local workdir  X:\Nicole Watson\working
local logdir   C:\Users\nw\Documents\HILDA Project\ARC Methodology\UQ research\Chapter 3 - Mobility\Results

capture log close
log using "`logdir'\log_setupdata_alt.log", replace

local maxwave  16
local maxalpha p
local alphabet abcdefghijklmnopqrstuvwxyz

***********************************
* Setup data
***********************************

* Create long file from combined datasets including both hhmove flag and respondent report of move
* Include characteristics to compare to ABS mobility estimates
forvalues wave=1 / `maxwave' {
  local w=substr("`alphabet'",`wave',1)
  local move1   `w'mhyr `w'mhmth `w'mhrea* `w'hhidate
  local move2p  `w'mhli `w'mhyr `w'mhmth `w'mhrea* `w'hhidate
  local char    `w'hgage `w'hgsex `w'hhtype `w'hhstate `w'hhsla `w'hhsd `w'hhsgcc `w'hsllord `w'hsmguse `w'hifefp `w'hifefn `w'hiwsfei `w'hibifip `w'hibifin `w'hifisi `w'hgint `w'hhwte `w'hhwtrp `w'rwe* `w'rwrp* `w'hhpers `w'hhadult `w'hhrih `w'mrcurr `w'anyoa `w'anbcob `w'esbrd `w'edhigh1
  local char1   `w'hstenur `w'hsrntby `w'dotype 
  local char2_3 `w'hhmove `w'hstenr `w'dodtype `w'hhmovek `w'lnwtrp `w'lnwte `w'rwlnr* `w'rwlne* xhhstrat xhhraid
  local char4p  `w'hhmove `w'hstenr `w'dodtyp `w'hhmovek `w'lnwtrp `w'lnwte `w'rwlnr* `w'rwlne* xhhstrat xhhraid
  
  if `wave'==1 {
    use xwaveid xwaveid `w'hhrhid `move1' `char' `char1' using "`hildadir'\combined_`w'`maxwave'0u", clear
	* for variables that differ over waves, make them equivalent
	* house tenure
	gen `w'hstenr=1 if `w'hstenur==1  // own / paying off
	replace `w'hstenr=2 if `w'hstenur==2 & `w'hsrntby!=1  // rent
	replace `w'hstenr=3 if `w'hstenur==2 & `w'hsrntby==1  // rent-buy
	replace `w'hstenr=4 if `w'hstenur==3  // rent free
	*bys `w'hsrntby: tab `w'hstenur `w'hstenr, miss
	drop `w'hstenur
	drop `w'hsrntby
	
	* dwelling type
	gen `w'dwell=1 if `w'dotype==1 // separate house
	replace `w'dwell=2 if `w'dotype==2 | `w'dotype==3 // semi-detached
	replace `w'dwell=3 if (`w'dotype>=4 & `w'dotype<=9) | `w'dotype>=13 // flat/unit/apartment
	replace `w'dwell=4 if (`w'dotype>=10 & `w'dotype<=12) | `w'dotype==14 | `w'dotype==15 // other
	*tab `w'dotype `w'dwell, miss 
	drop `w'dotype
  }	
  else if `wave'>=2 & `wave'<=3 {
    use xwaveid xwaveid `w'hhrhid `move2p' `char' `char2_3' using "`hildadir'\combined_`w'`maxwave'0u", clear
	* for variables that differ over waves, make them equivalent
	* dwelling type
	gen `w'dwell=1 if `w'dodtype==2 // separate house
	replace `w'dwell=2 if `w'dodtype>=4 & `w'dodtype<=5 // semi-detached
	replace `w'dwell=3 if (`w'dodtype>=7 & `w'dodtype<=11) | `w'dodtype==15 // flat/unit/apartment
	replace `w'dwell=4 if `w'dodtype==14 | `w'dodtype==98  | `w'dodtype==3 | `w'dodtype==6 | `w'dodtype==12 | `w'dodtype==13 // other
	replace `w'dwell=5 if `w'dodtype==1 // non-private dwelling
	*tab `w'dodtype `w'dwell, miss 
	drop `w'dodtype
  }	
  else if `wave'>=4 {
    use xwaveid xwaveid `w'hhrhid `move2p' `char' `char4p' using "`hildadir'\combined_`w'`maxwave'0u", clear
	* for variables that differ over waves, make them equivalent
	* dwelling type
	gen `w'dwell=1 if `w'dodtyp==3 // separate house
	replace `w'dwell=2 if `w'dodtyp>=5 & `w'dodtyp<=6 // semi-detached
	replace `w'dwell=3 if (`w'dodtyp>=8 & `w'dodtyp<=13) | `w'dodtyp==16 // flat/unit/apartment
	replace `w'dwell=4 if `w'dodtyp==15 | `w'dodtyp==98  | `w'dodtyp==4   | `w'dodtyp==7 | `w'dodtyp==14 // other
	replace `w'dwell=5 if `w'dodtyp==1 | `w'dodtyp==2 // non-private dwelling
	*tab `w'dodtyp `w'dwell, miss 
	drop `w'dodtyp
  }	
	
  * Set hhmove to missing for topup sample newly added in wave 11
  if `wave'==11 {
    replace `w'hhmove=. if substr(`w'hhrhid,1,1)=="8" | substr(`w'hhrhid,1,1)=="9"
    * Flag topup sample
	gen topup=1 if substr(`w'hhrhid,1,1)=="8" | substr(`w'hhrhid,1,1)=="9"
    replace topup=0 if topup != 1
  }
  else if `wave'>11 {
	gen topup=1 if substr(`w'hhrhid,1,1)=="8" | substr(`w'hhrhid,1,1)=="9"
    replace topup=0 if topup != 1
  }
  else if `wave'<11 {
	gen topup=0
  }

  renpfix `w' 
  
  gen wave=`wave'
  
  * For people who age into being eligible for interview, include in longitudinal sample with appropriate weight
  * if wave=2 then for people 15 use hhwtrp for w2
  * if wave=3 then for people 16 use wlrb_c and for people 15 use hhwtrp for w3
  * if wave=4 then for people 17 use wlrb_d and for people 16 use wlrc_d and for people 15, use hhwtrp for w4
  * if wave=5 then for people 18 use wlrb_e and for people 17 use wlrb_e and for people 16 use wlrc_e and for people 15, use hhwtrp for w5, etc
  forvalues num=2 / `wave' {
    local n=substr("`alphabet'",`num',1)
	if `num'==2 {
	  gen lnwtrpr=lnwtrp
	  forvalues i=1/45 {
	    gen rwlnrr`i'=rwlnr`i'
	  }
	}
	if `num'==`wave' {
	  replace lnwtrpr=hhwtrp if hgage==15
	  forvalues i=1/45 {
	    replace rwlnrr`i'=rwrp`i' if hgage==15
	  }  
	}
	else {
	  display in red "about to merge to longitudinal weights"
      merge 1:m xwaveid using "`repwtsdir'\longitudinal_weights_p160_reps.dta", keepusing(xwaveid wlr`n'_`w'*)
	  display in red "finished merge to longitudinal weights"
      replace lnwtrpr=wlr`n'_`w' if hgage==16+`wave'-`num'-1
	  forvalues i=1/45 {
      replace rwlnrr`i'=wlr`n'_`w'`i' if hgage==16+`wave'-`num'-1
	  }
      keep if _merge==3
	  drop wlr`n'_`w'* _merge
	}
  }
    
  save "`workdir'\ep`wave'", replace
  
  if `wave'==1 {
    save "`workdir'\eplong", replace
  }
  else {
    append using "`workdir'\eplong"
    save "`workdir'\eplong", replace
  }
}

table wave

* merge on hhsm and person-level distance moved from the master file
merge m:1 xwaveid using "`hildadir'\master_`maxalpha'`maxwave'0u", keepusing(xwaveid hhsm *hhmovek ivwptn)
drop _merge
* assign distance moved to relevant wave
forvalues wave=2 / `maxwave' {
  local w=substr("`alphabet'",`wave',1)
  if `wave'==2 {
    gen pmovek=`w'hhmovek if wave==`wave'
  }
  else {
    replace pmovek=`w'hhmovek if wave==`wave'
  }
  drop `w'hhmovek
}

destring(xwaveid), replace
tsset xwaveid wave
rename hgsex sex

***************************************
* create additional predictor variables
***************************************

* female indicator
gen female=(sex==2)

* powers of age
gen hgage2=hgage^2
gen hgage3=hgage^3
gen hgage4=hgage^4

* Age - 5 year age groups
recode hgage (0/4=1) (5/9=2) (10/14=3) (15/19=4) (20/24=5) (25/29=6) (30/34=7) (35/39=8) (40/44=9) (45/49=10) (50/54=11) ///
	(55/59=12) (60/64=13) (65/69=14) (70/74=15) (75/79=16) (80/84=17) (85/105=18), gen(ageg)
label variable ageg "5-year age group"
label define ag 1 "0-4" 2 "5-9" 3 "10-14" 4 "15-19" 5 "20-24" 6 "25-29" 7 "30-34" 8 "35-39" 9 "40-44" 10 "45-49" ///
    11 "50-54" 12 "55-59" 13 "60-64" 14 "65-69" 15 "70-74" 16 "75-79" 17 "80-84" 18 "85+" 
label values ageg ag

* Age at wave 1 (at 30 June 2001) and 5 year age groups - will be negative for babies born after 30 June 2001
gen agew1=hgage-(wave-1)
recode agew1 (-15/-1=-1) (0/4=1) (5/9=2) (10/14=3) (15/19=4) (20/24=5) (25/29=6) (30/34=7) (35/39=8) (40/44=9) (45/49=10) (50/54=11) ///
	(55/59=12) (60/64=13) (65/69=14) (70/74=15) (75/79=16) (80/84=17) (85/100=18), gen(agegw1)
label variable agew1 "Age at (at 30 June) in Wave 1"
label variable agegw1 "Age group at (at 30 June) in Wave 1"
label define ageg -1 "Not born" 1 "0-4" 2 "5-9" 3 "10-14" 4 "15-19" 5 "20-24" 6 "25-29" 7 "30-34" 8 "35-39" 9 "40-44" 10 "45-49" 11 "50-54" ///
	12 "55-59" 13 "60-64" 14 "65-69" 15 "70-74" 16 "75-79" 17 "80-84" 18 "85-100"
label values agegw1 ageg

bys topup: tab wave hhsm

* Year of arrival
gen immig=1 if anbcob==1
replace immig=2 if anyoa>0 & anyoa<=(2000+wave-10)
replace immig=3 if anyoa != (2000+wave) & anyoa>=(2000+wave-10)
replace immig=4 if anyoa == (2000+wave)

label variable immig "Immigration group"
label define immigg 1 "Born in Australia" 2 "Arrived more than 10 years ago" 3 "Arrived in last 10 years" 4 "Arrived current year"
label values immig immigg
* 99 interviewed cases missing anyoa or anbcob

tab wave immig, miss

* wave group
recode wave (1=1) (2/3=2) (4/5=3) (6/10=4) (11/16=6), gen(wavegrp)
label variable wavegrp "Grouping of waves"
label define waveg 1 "1" 2 "2-3" 3 "4-5" 4 "6-10" 5 "11-16"
label values wavegrp waveg

***************************************
* create additional move indicators
***************************************

* set up move indicator for last 12 months (this is in addition to hhmove which flags cases that moved since last wave when HF structure date)
sort xwaveid wave
gen ivwmth=real(substr(hhidate,4,2))
gen ivwyr=real(substr(hhidate,7,4))
gen move12m=1 if wave==1 & ((mhyr==ivwyr & mhmth<ivwmth) | (mhyr==ivwyr-1 & mhmth>=ivwmth))
replace move12m=0 if wave==1 & move12m==.
replace move12m=1 if wave>1 & ((mhyr==ivwyr & mhmth<ivwmth) | (mhyr==ivwyr-1 & mhmth>=ivwmth))  // moved since last interview
replace move12m=1 if wave>1 & (l.mhyr==ivwyr-1 & l.mhmth>=ivwmth)  // moved before last interview but within 12 months
replace move12m=0 if wave>1 & move12m==.
table wave move12m, miss
* set up person-level move indicator since last interview (based on distance moved)
*table pmovek if pmovek<5
gen pmove=1 if (pmovek>0 & pmovek!=.) 
replace pmove=0 if pmovek==0 
* Modify indicator if person-level distance moved is less than 0.5 km.
sort xwaveid wave
gen lasthgint=l.hgint
bys wave hhrhid: egen pmvinhh=sum(pmovek) if pmovek>=0  // sum of distances people in household moved
replace pmove=1 if hhmove==1 & hhmovek==0 & pmovek==0 & pmvinhh==0 // flag cases that have moved less than 0.5km at the household level if there is noone else who moved a distance in the household
* Note - the above line does not pick up partial household moves of less than 0.5km
table wave pmove, miss
bys wave: sum hhmove pmove move12m if lnwtrp>0 & lnwtrp!=.

* set up move indicators for last 5 years
sort xwaveid wave
gen hhmove5=1 if (hhmove==1 | l.hhmove==1 | l2.hhmove==1 | l3.hhmove==1 | l4.hhmove==1) & (hhmove!=. & l.hhmove!=. & l2.hhmove!=. & l3.hhmove!=. & l4.hhmove!=.)
replace hhmove5=0 if hhmove==0 & l.hhmove==0 & l2.hhmove==0 & l3.hhmove==0 & l4.hhmove==0
tab hhmove5 wave, col

gen pmove5=1 if (pmove==1 | l.pmove==1 | l2.pmove==1 | l3.pmove==1 | l4.pmove==1) & (pmove!=. & l.pmove!=. & l2.pmove!=. & l3.pmove!=. & l4.pmove!=.)
replace pmove5=0 if pmove==0 & l.pmove==0 & l2.pmove==0 & l3.pmove==0 & l4.pmove==0
tab pmove5 wave, col

gen move12m5=1 if (move12m==1 | l.move12m==1 | l2.move12m==1 | l3.move12m==1 | l4.move12m==1) & (move12m!=. & l.move12m!=. & l2.move12m!=. & l3.move12m!=. & l4.move12m!=.)
replace move12m5=0 if move12m==0 & l.move12m==0 & l2.move12m==0 & l3.move12m==0 & l4.move12m==0

* set up move indicators for last 10 years
gen hhmove10=1 if (hhmove==1 | l.hhmove==1 | l2.hhmove==1 | l3.hhmove==1 | l4.hhmove==1 | l5.hhmove==1 | l6.hhmove==1 | l7.hhmove==1 | l8.hhmove==1 | l9.hhmove==1) ///
                & (hhmove!=. & l.hhmove!=. & l2.hhmove!=. & l3.hhmove!=. & l4.hhmove!=. & l5.hhmove!=. & l6.hhmove!=. & l7.hhmove!=. & l8.hhmove!=. & l9.hhmove!=.)
replace hhmove10=0 if hhmove==0 & l.hhmove==0 & l2.hhmove==0 & l3.hhmove==0 & l4.hhmove==0 & l5.hhmove==0 & l6.hhmove==0 & l7.hhmove==0 & l8.hhmove==0 & l9.hhmove==0
tab hhmove10 wave, col

gen pmove10=1 if (pmove==1 | l.pmove==1 | l2.pmove==1 | l3.pmove==1 | l4.pmove==1 | l5.pmove==1 | l6.pmove==1 | l7.pmove==1 | l8.pmove==1 | l9.pmove==1) ///
                & (pmove!=. & l.pmove!=. & l2.pmove!=. & l3.pmove!=. & l4.pmove!=. & l5.pmove!=. & l6.pmove!=. & l7.pmove!=. & l8.pmove!=. & l9.pmove!=.)
replace pmove10=0 if pmove==0 & l.pmove==0 & l2.pmove==0 & l3.pmove==0 & l4.pmove==0 & l5.pmove==0 & l6.pmove==0 & l7.pmove==0 & l8.pmove==0 & l9.pmove==0
tab pmove10 wave, col

gen move12m10=1 if (move12m==1 | l.move12m==1 | l2.move12m==1 | l3.move12m==1 | l4.move12m==1 | l5.move12m==1 | l6.move12m==1 | l7.move12m==1 | l8.move12m==1 | l9.move12m==1) ///
                & (move12m!=. & l.move12m!=. & l2.move12m!=. & l3.move12m!=. & l4.move12m!=. & l5.move12m!=. & l6.move12m!=. & l7.move12m!=. & l8.move12m!=. & l9.move12m!=.)
replace move12m10=0 if move12m==0 & l.move12m==0 & l2.move12m==0 & l3.move12m==0 & l4.move12m==0 & l5.move12m==0 & l6.move12m==0 & l7.move12m==0 & l8.move12m==0 & l9.move12m==0
tab move12m10 wave, col
  
save "`workdir'/move_long_alt", replace

local workdir  X:\Nicole Watson\working
use "`workdir'/move_long_alt", clear

* survey setup
svyset [pweight=lnwte], vce(jackknife) jkrweight(rwlne*)

capture drop dmove*
* set up where moved to var
sort xwaveid wave
gen dmove=.
replace dmove=0 if pmove==0
replace dmove=1 if pmove==1 & hhsla==l.hhsla & hhsla!=-7  // moved to same SLA
replace dmove=2 if pmove==1 & hhsla!=l.hhsla & hhsd==l.hhsd & hhsla!=-7 & hhsd!=-7  // moved to diff SLA but same SD
replace dmove=3 if pmove==1 & hhsla!=l.hhsla & hhsd!=l.hhsd & hhstate==l.hhstate & hhsla!=-7 & hhsd!=-7  // moved to diff SD but same state
replace dmove=4 if pmove==1 & hhsla!=l.hhsla & hhsd!=l.hhsd & hhstate!=l.hhstate & hhsla!=-7 & hhsd!=-7  // moved to diff state
replace dmove=5 if pmove==1 & dmove==.  // undefined
tab dmove, miss
tab dmove [aweight=lnwte] if wave==6
svy: tab dmove if wave==6, se
svy, subpop(pmove): tab dmove if wave==6 , se col

sort xwaveid wave
gen dmove5=.
replace dmove5=0 if pmove5==0
replace dmove5=1 if pmove5==1 & hhsla==l5.hhsla & hhsla!=-7  // moved to same SLA
replace dmove5=2 if pmove5==1 & hhsla!=l5.hhsla & hhsd==l5.hhsd & hhsla!=-7 & hhsd!=-7  // moved to diff SLA but same SD
replace dmove5=3 if pmove5==1 & hhsla!=l5.hhsla & hhsd!=l5.hhsd & hhstate==l5.hhstate & hhsla!=-7 & hhsd!=-7 // moved to diff SD but same state
replace dmove5=4 if pmove5==1 & hhsla!=l5.hhsla & hhsd!=l5.hhsd & hhstate!=l5.hhstate & hhsla!=-7 & hhsd!=-7 // moved to diff state
replace dmove5=5 if pmove5==1 & dmove5==.  // undefined
tab dmove5, miss
tab dmove5 [aweight=lnwte] if wave==6
svy: tab dmove5 if wave==6, se
svy, subpop(pmove5): tab dmove5 if wave==6 , se col

log close
